#!/usr/bin/env -S uv run --script
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///
# SEE: https://peps.python.org/pep-0723/
# SEE: https://docs.astral.sh/uv/guides/scripts/#using-a-shebang-to-create-an-executable-file
# USAGE: convert_gherkin-languages.py
"""
Generates I18N python module based on `cucumber`_ `gherkin-languages.json`_.

.. _cucumber: https://github.com/cucumber/common
.. _gherkin: https://github.com/cucumber/gherkin
.. _`gherkin-languages.json`: https://raw.githubusercontent.com/cucumber/gherkin/main/gherkin-languages.json

.. seealso::

    * https://github.com/cucumber/gherkin/blob/main/gherkin-languages.json
    * https://raw.githubusercontent.com/cucumber/gherkin/main/gherkin-languages.json
    * https://github.com/cucumber/common

.. note::

    BASED ON: convert_i18n_yaml.py
"""

import argparse
import json
import os.path
import pprint
import sys
from codecs import open
from urllib.request import urlopen

HERE = os.path.dirname(__file__)
NAME = os.path.basename(__file__)
__version__ = "1.1.0"

STEP_KEYWORDS = ("and", "but", "given", "when", "then")
GHERKIN_LANGUAGES_JSON_URL = \
    "https://raw.githubusercontent.com/cucumber/gherkin/main/gherkin-languages.json"



def download_file(source_url, filename=None):
    # -- MISSING: PROXY support
    # source_encoding = "UTF-8"
    # dest_encoding = encoding or "UTF-8"
    with urlopen(source_url) as f:
        contents = f.read()     # .decode(source_encoding)
        dest_file = open(filename, "wb+")
        dest_file.write(contents)
        dest_file.close()

# -- DISABLED:
# def yaml_normalize(data):
#     for part in data:
#         keywords = data[part]
#         for k in keywords:
#             v = keywords[k]
#             # bloody YAML parser returns a mixture of unicode-string and bytes
#             if not isinstance(v, str):
#                 v = v.decode("UTF-8")
#             keywords[k] = v.split("|")
#     return data


def data_normalize(data, verbose=False):
    """Normalize "gherkin-languages.json" data into internal format,
    needed by behave."

    :param data: Language data (as dictionary).
    :return: Normalized data (as dictionary).
    """
    for language in data:
        if verbose:
            print("Language: %s ..." % language)
        # -- STEP: Normalize attribute "scenarioOutline" => "scenario_outline"
        lang_keywords = data[language]
        lang_keywords["scenario_outline"] = lang_keywords["scenarioOutline"]
        del lang_keywords["scenarioOutline"]

        # -- DEPRECATED: STEP: Normalize step keywords.
        if False:
            for k in lang_keywords:
                if k in STEP_KEYWORDS:
                    values = lang_keywords[k]
                    # if isinstance(values, str):
                    #    continue
                    assert isinstance(values, list)
                    values2 = []
                    for step_keyword in values:
                        if step_keyword.endswith(" "):
                            # -- NORMAL CASE: Needs trailing space as separator.
                            # STRIP IT.
                            step_keyword = step_keyword[:-1]
                        else:
                            # -- SPECIAL CASE: Needs no space (Chinese, Japanese, ...)
                            # MARK IT: With trailing "<" (less-than), needed by parser.
                            step_keyword += "<"
                        values2.append(step_keyword)
                    # BAD: values2 = [item.strip() for item in values]
                    lang_keywords[k] = values2
    return data


def gherkin_languages_to_python_module(gherkin_languages_path, output_file=None,
                                       encoding=None, verbose=False):
    """Workhorse.
    Performs the conversion from "gherkin-languages.json" to "i18n.py".
    Writes output to file or console (stdout).

    :param gherkin_languages_path: File path for JSON file.
    :param output_file:     Output filename (or STDOUT for: None, "stdout", "-")
    :param encoding:        Optional output encoding to use (default: UTF-8).
    :param verbose:         Enable verbose mode (as bool; optional).
    """
    if encoding is None:
        encoding = "UTF-8"

    # -- STEP 1: Load JSON data.
    json_encoding = "UTF-8"
    languages = json.load(open(gherkin_languages_path, encoding=json_encoding))
    languages = data_normalize(languages, verbose=verbose)
    # l-- DISABLED: anguages = yaml_normalize(languages)

    # -- STEP 2: Generate python module with i18n data.
    header = '''# -*- coding: {encoding} -*-
# -- GENERATED BY: convert_gherkin-languages.py
# FROM:   "gherkin-languages.json"
# SOURCE: {gherkin_languages_json_url}
# pylint: disable=line-too-long, too-many-lines, missing-docstring, invalid-name
# ruff: noqa: E501
"""
Gherkin keywords in the different I18N languages, like:

* English
* French
* German
* ...
"""

languages = \\
'''.format(gherkin_languages_json_url=GHERKIN_LANGUAGES_JSON_URL, encoding=encoding)

    if not output_file or output_file in ("-", "stdout"):
        i18n_py = sys.stdout
        should_close = False
    else:
        i18n_py = open(output_file, "wb", encoding=encoding)
        should_close = True

    try:
        i18n_py.write(header)
        i18n_py.write(pprint.pformat(languages))
        i18n_py.write("\n")
    finally:
        if should_close:
            i18n_py.close()


def main(args=None):
    """Main function to generate the "behave/i18n.py" module
    from the "gherkin-languages.json" file.

    :param args:  List of command-line args (if None: Use ``sys.argv``)
    :return: 0, on success (or sys.exit(NON_ZERO_NUMBER) on failure).
    """
    if args is None:
        args = sys.argv[1:]
    parser = argparse.ArgumentParser(prog=NAME,
                description="Generate python module i18n from JSON based data")
    parser.add_argument("-d", "--data", dest="json_file",
                        default=os.path.join(HERE, "gherkin-languages.json"),
                        help="Path to gherkin-languages.json file.")
    parser.add_argument("-e", "--encoding", dest="encoding",
                        default="UTF-8",
                        help="Output encoding.")
    parser.add_argument("--verbose", dest="verbose", default=False,
                        action="store_true",
                        help="Enable verbose mode.")
    parser.add_argument("output_file", default="i18n.py", nargs="?",
                help="Filename of Python I18N module (as output).")
    parser.add_argument("--version", action="version", version=__version__)

    options = parser.parse_args(args)
    if not os.path.isfile(options.json_file):
        parser.error("JSON file not found: %s" % options.json_file)
    if not options.output_file:
        options.output_file = "i18n.py"

    try:
        print("Writing %s .." % options.output_file)
        gherkin_languages_to_python_module(options.json_file, options.output_file,
                                           encoding=options.encoding,
                                           verbose=options.verbose)
    except Exception as e:
        message = "%s: %s" % (e.__class__.__name__, e)
        sys.exit(message)
    return 0


# ---------------------------------------------------------------------------
# AUTO-MAIN:
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    sys.exit(main())
